<?php
define('CHARSET',  'utf-8');//网站编码
@header("Content-type: text/html; charset=".CHARSET);
/*
 * copy 远程文件到本地
 */
function copyFile($url,$path){
    copy($url, $path);

    if (!copy($url, $path)) {
        copyFile($url,$path);
    }else{
        return true;
    }
}

function createDir($path)
{
    if (!file_exists($path)) {
        createDir(dirname($path));
        mkdir($path, 0777);
    }
}

function checkFile($filename){
    if (file_exists($filename)) {
        return true;
    } else {
        return false;
    }
}

function retrieve($url)
{
    preg_match('/\/([^\/]+\.[a-z]+)[^\/]*$/',$url,$match);
    return $match[1];
}

function copyImage($css_url, $url){

        $data = file_get_contents($css_url);
        preg_match('/(.*\/\/.*?)\//', $url, $host);
        $host = $host[1];

        $regex = '/url\(\'{0,1}\"{0,1}(.*?)\'{0,1}\"{0,1}\)/';
        preg_match_all($regex, $data, $result);

        $true_arr = array();
        $false_arr = array();

        foreach ($result[1] as $val) {
            if (preg_match('/^http.*/', $val)) {
                $target = $val;
            } else if (preg_match('/^\/.*/', $val)) {
                $target = str_replace('../','',$host . $val);
            } else {
                $target = str_replace('../','',$url . $val);
            }

            preg_match('/.*\/(.*\.\D+)$/', $val, $name);

            $dir1 = str_replace($url,'',$target);
            $dir2 = explode('/',$dir1);
            array_pop($dir2);
            $dir = './'.implode('/',$dir2).'/';

            if(@fopen($target, 'r' )){
                $true_arr[] =$target;

                createDir(dirname($dir1));

                if (!is_file($dir . $name[1])) {
                   copyFile($target, $dir . $name[1]);
                }
            }else{
                $false_arr[] = $target;
            }
        }
        echo '<br>成功下载图片：<br><br>';
        print_r(array_unique($true_arr));

        echo '<br>下载失败图片：<br><br>';
        print_r(array_unique($false_arr));
}

/*
 * 获得抓取页面的地址
 */
$http_url = @$_POST['http_url'];
if($http_url){
	include "simple_html_dom.php" ;
	// Create DOM from URL or file
	$html = file_get_html($http_url);

    // Find all script
    $script_arr = array();
    $script_null_num = 0;
    foreach($html->find('script') as $element) {
        if($element->src){
            $script_arr[] = $element->src;
        }else{
            $script_null_num++;
        }

    }

    echo 'JS  脚本：'.(count($script_arr)+$script_null_num).'个<br><br>';
    print_r($script_arr);
    echo '<br>';
    $s_num = 0;
    foreach($script_arr as $s_v){
        $ret = copyFile($s_v,'./js/'.retrieve($s_v));

        if($ret){
            $s_num++;
        }else{
            echo $s_v . '<br>';
        }
    }
    echo '成功 下载 ：'.$s_num.'个<br><br>';

	// Find all link
    $link_arr = array();
    $link_null_num = 0;
	foreach($html->find('link') as $element) {
        if($element->href && $element->href != '/favicon.ico'){
            $link_arr[] = $element->href;
        }else{
            $link_null_num++;
        }
    }

    echo 'CSS  脚本：'.(count($link_arr)+$link_null_num).'个<br><br>';
    print_r($link_arr);
    echo '<br><br>';
    $c_num = 0;
    foreach($link_arr as $c_v){
        $ret = copyFile($c_v,'./css/'.retrieve($c_v));

        if($ret){
            copyImage($c_v,'http://static.ikeepu.com/r-20120921/');
            $c_num++;
        }else{
            echo $c_v . '<br><br>';
        }
    }
    echo '<br><br>成功 下载 ：'.$c_num.'个<br><br>';


    echo '下载完毕！';

	exit;
}
?>

<form method="post" action="takeHtml.php">
http_url:<input type = "text" name ="http_url" size="50"/>
<input type = "submit" name ="download" value="下载" />
</form>